setwd("C:/Users/eas24f/OneDrive - Florida State University/Research/Inertia RESTAT/Data") # Office computer directory
fill_missing <- FALSE
keep_missing <- FALSE

	if (fill_missing) {
		choices <- get(load("ca_choice_matrix_JUN182021_net_fillmissing"))
		previous_choices <- get(load("ca_previous_choice_matrix_JUN182021_net_fillmissing"))	
		new_choices <- get(load("ca_new_choice_matrix_JUN182021_net_fillmissing"))
		premiums <- get(load("ca_premium_matrix_JUN182021_net_fillmissing"))
		avs <- get(load("ca_AV_matrix_JUN182021_net_fillmissing"))
		subsidies <- get(load("ca_subsidy_matrix_JUN182021_net_fillmissing"))
		households <- get(load("ca_household_characteristics_JUN182021_net_fillmissing"))
		network_breadth <- get(load("ca_network_breadth_matrix_JUN182021_net_fillmissing"))	
		network_inclus <- get(load("ca_network_inclus_matrix_JUN182021_net_fillmissing"))	
		network_pairinclus <- get(load("ca_network_pairinclus_matrix_JUN182021_net_fillmissing"))	
	} else if (keep_missing) {
		choices <- get(load("ca_choice_matrix_JUN182021_net_keepmissing"))
		previous_choices <- get(load("ca_previous_choice_matrix_JUN182021_net_keepmissing"))	
		new_choices <- get(load("ca_new_choice_matrix_JUN182021_net_keepmissing"))
		premiums <- get(load("ca_premium_matrix_JUN182021_net_keepmissing"))
		avs <- get(load("ca_AV_matrix_JUN182021_net_keepmissing"))
		subsidies <- get(load("ca_subsidy_matrix_JUN182021_net_keepmissing"))
		households <- get(load("ca_household_characteristics_JUN182021_net_keepmissing"))
		network_breadth <- get(load("ca_network_breadth_matrix_JUN182021_net_keepmissing"))	
		network_inclus <- get(load("ca_network_inclus_matrix_JUN182021_net_keepmissing"))	
		network_pairinclus <- get(load("ca_network_pairinclus_matrix_JUN182021_net_keepmissing"))	
		
		# Remove all households that have no network breadth values (e.g., all 2019 households)
			# uninsured plan has 0 breadth, so sum is at least 1
		keep_indices <- setdiff(1:nrow(households),which(rowSums(!is.na(network_breadth)) == 1))

		choices <- choices[keep_indices,]
		previous_choices <- previous_choices[keep_indices,]
		new_choices <- new_choices[keep_indices,]
		premiums <- premiums[keep_indices,]
		avs <- avs[keep_indices,]
		subsidies <- subsidies[keep_indices,]
		households <- households[keep_indices,]
		network_breadth <- network_breadth[keep_indices,]
		network_inclus <- network_inclus[keep_indices,]
		network_pairinclus <- network_pairinclus[keep_indices,]

		# Need to get rid of anyone without a choice
		# Need to get rid of anyone with only uninsured as a choice

		nb <- network_breadth[,1:(ncol(network_breadth)-1)]
		nb[which(nb == 0)] <- NA
		network_breadth[,1:(ncol(network_breadth)-1)] <- nb
		valid_choices <- as.vector(!is.na(t(choices + network_breadth + network_inclus + network_pairinclus)))	
		
		output_to_check <- expand.grid(colnames(choices),1:nrow(choices))[valid_choices,]
		output_to_check$choice <- as.vector(t(choices))[valid_choices]
		
		choices_per_household <- by(output_to_check[,3],output_to_check[,2],sum)
		choice_set_size_per_household <- by(output_to_check[,2],output_to_check[,2],length)
		households_to_drop <- as.numeric(union(which(choices_per_household == 0),
			which(choice_set_size_per_household == 1)))

		keep_indices <- setdiff(1:nrow(households),households_to_drop) 
		
		choices <- choices[keep_indices,]
		previous_choices <- previous_choices[keep_indices,]
		new_choices <- new_choices[keep_indices,]
		premiums <- premiums[keep_indices,]
		avs <- avs[keep_indices,]
		subsidies <- subsidies[keep_indices,]
		households <- households[keep_indices,]
		network_breadth <- network_breadth[keep_indices,]
		network_inclus <- network_inclus[keep_indices,]
		network_pairinclus <- network_pairinclus[keep_indices,]

	} else {
		choices <- get(load("ca_choice_matrix_JUN182021_net"))
		previous_choices <- get(load("ca_previous_choice_matrix_JUN182021_net"))	
		new_choices <- get(load("ca_new_choice_matrix_JUN182021_net"))
		premiums <- get(load("ca_premium_matrix_JUN182021_net"))
		avs <- get(load("ca_AV_matrix_JUN182021_net"))
		subsidies <- get(load("ca_subsidy_matrix_JUN182021_net"))
		households <- get(load("ca_household_characteristics_JUN182021_net"))
		network_breadth <- get(load("ca_network_breadth_matrix_JUN182021_net"))	
		network_inclus <- get(load("ca_network_inclus_matrix_JUN182021_net"))	
		network_pairinclus <- get(load("ca_network_pairinclus_matrix_JUN182021_net"))	
	}

	
##### Initialize output object

	if (keep_missing) {
		
		# If plan has 0 network breadth, replace with NA
		nb <- network_breadth[,1:(ncol(network_breadth)-1)]
		nb[which(nb == 0)] <- NA
		network_breadth[,1:(ncol(network_breadth)-1)] <- nb
		valid_choices <- as.vector(!is.na(t(choices + network_breadth + network_inclus + network_pairinclus)))	
	
	} else {
		valid_choices <- as.vector(!is.na(t(choices)))		
	}
	julia_output <- expand.grid(colnames(choices),1:nrow(choices))[valid_choices,]
	gc()
	
##### Save year and weight from household object and then delete
	
	household_years <- rep(households[rownames(choices),"year"],each=ncol(choices))[valid_choices]	
	weight <- rep(households[rownames(choices),"weight"],each=ncol(choices))[valid_choices]
	penalties <- rep(households[rownames(choices),"penalty"]/12,each=ncol(choices))[valid_choices]
	#rm(households)
	gc()
		
##### Reduce memory of premium and choice objects
	
	num_households <- nrow(choices)
	num_plans <- ncol(choices)
	
	# Reduce memory
	choices <- as.vector(t(choices))[valid_choices]
	gc()
	
	previous_choices <- as.vector(t(previous_choices))[valid_choices]
	gc()
	
	new_choices <- as.vector(t(new_choices))[valid_choices]
	gc()
	
	premiums <- as.vector(t(premiums))[valid_choices] - penalties
	gc()
	
	avs <- as.vector(t(avs))[valid_choices]
	
	subsidies <- as.vector(t(subsidies))[valid_choices]
	gc()
	
	network_breadth <- as.vector(t(network_breadth))[valid_choices]
	gc()
	
	network_inclus <- as.vector(t(network_inclus))[valid_choices]
	gc()
	
	network_pairinclus <- as.vector(t(network_pairinclus))[valid_choices]
	gc()
	
##### To send to Julia
	
	julia_output$choice <- choices
	rm(choices)
	gc()
	
	julia_output$previous_choice <- previous_choices
	julia_output[is.na(julia_output$previous_choice),"previous_choice"] <- 0
	
	julia_output$new_plan <- new_choices
	julia_output$premium <- premiums
	julia_output$av <- avs
	julia_output$subsidy <- subsidies
	julia_output$penalty <- penalties
	julia_output$network_breadth <- network_breadth
	julia_output$network_inclus <- network_inclus
	julia_output$network_pairinclus <- network_pairinclus
	rm(new_choices)
	rm(premiums)
	rm(avs)
	rm(subsidies)
	rm(penalties)
	rm(network_breadth)
	rm(network_inclus)
	gc()
	
	julia_output$year <- household_years
	rm(household_years)
	gc()
	
	julia_output$weight <- weight
	rm(weight)
	gc()
		
	julia_output$uninsured_plan <- rep(c(rep(0,num_plans-1),1),num_households)[valid_choices]
	julia_output[julia_output$uninsured_plan == 1,c("premium","subsidy")] <- 0
	
	##### Create Choice Set Identifier
	# We want to identify all households with the same choice set
	# This varies within rating area because of heterogeneous firm entry
	
	households$choice_set_name <- paste(households$rating_area,households$year,households$num_choices,sep="_")
	
		# This gives the unique choice set name for everything except 2 zip-region-years ("956_1_2016","957_1_2016")
		households[households$zip_region_year == "957_1_2016","choice_set_name"] <- "1_2016_16_2"
	
	unique_choice_sets <- households[!duplicated(households$choice_set_name),c("choice_set_name","num_choices","year")]
	households$choice_set_id <- NA
	for(j in 1:nrow(unique_choice_sets)) {
		households[households$choice_set_name == unique_choice_sets[j,"choice_set_name"],"choice_set_id"] <- j
	}
	
	colnames(julia_output) <- c("plan_name","household_number","choice","previous_choice","new_plan",
		"premium","av","subsidy","penalty","network_breadth","network_inclus","network_pairinclus","year","weight","uninsured_plan")
	julia_output$choice_set_id <- households[julia_output$household_number,"choice_set_id"]
	julia_output$household_id <- households[julia_output$household_number,"household_id"]
	
	# Network missing for some households
	#good_households <- setdiff(1:nrow(households),unique(julia_output[is.na(julia_output$network_inclus),"household_number"]))
		
		#households$new_household_number <- NA
		#households[good_households,"new_household_number"] <- 1:length(good_households)
		
		#julia_output <- julia_output[julia_output$household_number %in% good_households,]
		#julia_output$household_number <- households[julia_output$household_number,"new_household_number"]
		
		#households <- households[good_households,]
		#households$new_household_number <- NULL
		
		if (fill_missing) {
			write.csv(julia_output,file="ca_julia_data_JUN182021_net_fillmissing.csv",row.names=FALSE)	
			write.csv(unique_choice_sets,file="unique_choice_sets_JUN182021_net_fillmissing.csv",row.names=FALSE)
		} else if (keep_missing) {
			write.csv(julia_output,file="ca_julia_data_JUN182021_net_keepmissing.csv",row.names=FALSE)	
			write.csv(unique_choice_sets,file="unique_choice_sets_JUN182021_net_keepmissing.csv",row.names=FALSE)
			write.csv(households,file="ca_household_characteristics_JUN182021_net_keepmissing.csv",row.names=FALSE)	
		} else {
			write.csv(julia_output,file="ca_julia_data_JUN182021_net.csv",row.names=FALSE)	
			write.csv(unique_choice_sets,file="unique_choice_sets_JUN182021_net.csv",row.names=FALSE)
		}
		
			
		#write.csv(households,file="ca_household_characteristics_JUN182021_net.csv",row.names=FALSE)		
		#save(households,file="ca_household_characteristics_JUN182021_net")	
		

	gc()
	
	